{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import datetime\n",
    "import lightgbm as lgb\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.cluster import KMeans\n",
    "import cufflinks as cf\n",
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "cf.set_config_file(world_readable=True,theme='pearl')\n",
    "from IPython.core.interactiveshell import InteractiveShell  \n",
    "InteractiveShell.ast_node_interactivity = \"all\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('../data/ceping.csv',encoding='gb18030')\n",
    "df_1=df[df['年级段']==1]\n",
    "df_2=df[df['年级段']==2]\n",
    "df_3=df[df['年级段']==3]\n",
    "#小学\n",
    "df_z_1 = df_1[['Z学习动力','Z学习能力','Z学习毅力']]\n",
    "df_t_1 = df_1[['T学习动力','T学习能力','T学习毅力']]\n",
    "#初中\n",
    "df_z_2 = df_2[['Z学习动力','Z学习能力','Z学习毅力']]\n",
    "df_t_2 = df_2[['T学习动力','T学习能力','T学习毅力']]\n",
    "#高中\n",
    "df_z_3 = df_3[['Z学习动力','Z学习能力','Z学习毅力']]\n",
    "df_t_3 = df_3[['T学习动力','T学习能力','T学习毅力']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df_z.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 小学"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-1.7104409  -0.82869383 -2.41358212]\n",
      " [ 0.85800571  0.13972035  0.78649876]\n",
      " [-0.77020248 -0.13100005 -0.82359222]\n",
      " [-0.12351863  0.10012643  0.20786221]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/270.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_z_1\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_z_1['categories']=labels\n",
    "df_z_1['categories']=df_z_1.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_z_1.iplot(kind='scatter3d',\n",
    "           x='Z学习动力',y='Z学习能力',z='Z学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='小学Z分数均值聚类',\n",
    "           opacity=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[48.76481373 51.00126432 52.07862209]\n",
      " [32.89559102 41.71306166 25.86417876]\n",
      " [42.29797516 48.68999954 41.76407779]\n",
      " [58.58005705 51.39720353 57.86498757]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/272.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_t_1\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_t_1['categories']=labels\n",
    "df_t_1['categories']=df_t_1.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_t_1.iplot(kind='scatter3d',\n",
    "           x='T学习动力',y='T学习能力',z='T学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='小学T分数均值聚类',\n",
    "           opacity=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 初中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.34728429 -0.1928449  -0.48817645]\n",
      " [ 1.08596863  0.50568121  1.20968376]\n",
      " [ 0.24668028  0.17967173  0.37839436]\n",
      " [-1.41755646 -0.76064604 -1.51202716]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/274.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_z_2\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_z_2['categories']=labels\n",
    "df_z_2['categories']=df_z_2.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_z_2.iplot(kind='scatter3d',\n",
    "           x='Z学习动力',y='Z学习能力',z='Z学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='初中Z分数均值聚类',\n",
    "           opacity=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[52.44287054 51.78217997 53.75669232]\n",
      " [46.52715706 48.07155095 45.11823546]\n",
      " [35.82443536 42.39353961 34.87972836]\n",
      " [60.80315565 55.04488854 62.04763718]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/276.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_t_2\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_t_2['categories']=labels\n",
    "df_t_2['categories']=df_t_2.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_t_2.iplot(kind='scatter3d',\n",
    "           x='T学习动力',y='T学习能力',z='T学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='初中T分数均值聚类',\n",
    "           opacity=1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 高中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.78714475 -0.26686114 -0.65721851]\n",
      " [-2.08810129 -1.03611538 -2.61331952]\n",
      " [ 0.92126528  0.29791421  0.8485879 ]\n",
      " [ 0.1074442   0.08545512  0.14422969]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/278.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_z_3\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_z_3['categories']=labels\n",
    "df_z_3['categories']=df_z_3.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_z_3.iplot(kind='scatter3d',\n",
    "           x='Z学习动力',y='Z学习能力',z='Z学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='高中Z分数均值聚类',\n",
    "           opacity=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[51.02204701 50.83305947 51.46642411]\n",
      " [29.11898707 39.63884621 23.86680479]\n",
      " [42.1285525  47.33138857 43.42781485]\n",
      " [59.22971226 52.99562074 58.39371776]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\Anaconda3\\lib\\site-packages\\IPython\\core\\display.py:689: UserWarning:\n",
      "\n",
      "Consider using IPython.display.IFrame instead\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tongzheming/280.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<chart_studio.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_t_3\n",
    "X = X.reset_index(drop=True)\n",
    "km = KMeans(n_clusters=4)\n",
    "km.fit(X)\n",
    "_=km.predict(X)\n",
    "labels = km.labels_\n",
    "\n",
    "centers = km.cluster_centers_\n",
    "print(centers)\n",
    "\n",
    "df_t_3['categories']=labels\n",
    "df_t_3['categories']=df_t_3.categories.apply(lambda x: 'category'+str(x+1))\n",
    "\n",
    "df_t_3.iplot(kind='scatter3d',\n",
    "           x='T学习动力',y='T学习能力',z='T学习毅力',\n",
    "           size=5,\n",
    "           categories='categories',\n",
    "           xTitle='学习动力',\n",
    "           yTitle='学习能力',\n",
    "           zTitle='学习毅力',\n",
    "           title='高中T分数均值聚类',\n",
    "           opacity=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
